## Parsed with column specification:
## cols(
## instant = col_double(),
## dteday = col_date(format = ""),
## season = col_double(),
## yr = col_double(),
## mnth = col_double(),
## holiday = col_double(),
## weekday = col_double(),
## workingday = col_double(),
## weathersit = col_double(),
## temp = col_double(),
## atemp = col_double(),
## hum = col_double(),
## windspeed = col_double(),
## casual = col_double(),
## registered = col_double(),
## cnt = col_double()
## )
## # A tibble: 6 x 16
## instant dteday season yr mnth holiday weekday workingday weathersit
## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2011-01-01 1 0 1 0 6 0 2
## 2 2 2011-01-02 1 0 1 0 0 0 2
## 3 3 2011-01-03 1 0 1 0 1 1 1
## 4 4 2011-01-04 1 0 1 0 2 1 1
## 5 5 2011-01-05 1 0 1 0 3 1 1
## 6 6 2011-01-06 1 0 1 0 4 1 1
## # … with 7 more variables: temp <dbl>, atemp <dbl>, hum <dbl>, windspeed <dbl>,
## # casual <dbl>, registered <dbl>, cnt <dbl>
## # A tibble: 6 x 16
## instant dteday season yr mnth holiday weekday workingday weathersit
## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 726 2012-12-26 1 1 12 0 3 1 3
## 2 727 2012-12-27 1 1 12 0 4 1 2
## 3 728 2012-12-28 1 1 12 0 5 1 2
## 4 729 2012-12-29 1 1 12 0 6 0 2
## 5 730 2012-12-30 1 1 12 0 0 0 1
## 6 731 2012-12-31 1 1 12 0 1 1 2
## # … with 7 more variables: temp <dbl>, atemp <dbl>, hum <dbl>, windspeed <dbl>,
## # casual <dbl>, registered <dbl>, cnt <dbl>
## instant dteday season yr
## Min. : 1.0 Min. :2011-01-01 Min. :1.000 Min. :0.0000
## 1st Qu.:183.5 1st Qu.:2011-07-02 1st Qu.:2.000 1st Qu.:0.0000
## Median :366.0 Median :2012-01-01 Median :3.000 Median :1.0000
## Mean :366.0 Mean :2012-01-01 Mean :2.497 Mean :0.5007
## 3rd Qu.:548.5 3rd Qu.:2012-07-01 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :731.0 Max. :2012-12-31 Max. :4.000 Max. :1.0000
## mnth holiday weekday workingday
## Min. : 1.00 Min. :0.00000 Min. :0.000 Min. :0.000
## 1st Qu.: 4.00 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.000
## Median : 7.00 Median :0.00000 Median :3.000 Median :1.000
## Mean : 6.52 Mean :0.02873 Mean :2.997 Mean :0.684
## 3rd Qu.:10.00 3rd Qu.:0.00000 3rd Qu.:5.000 3rd Qu.:1.000
## Max. :12.00 Max. :1.00000 Max. :6.000 Max. :1.000
## weathersit temp atemp hum
## Min. :1.000 Min. :0.05913 Min. :0.07907 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.33708 1st Qu.:0.33784 1st Qu.:0.5200
## Median :1.000 Median :0.49833 Median :0.48673 Median :0.6267
## Mean :1.395 Mean :0.49538 Mean :0.47435 Mean :0.6279
## 3rd Qu.:2.000 3rd Qu.:0.65542 3rd Qu.:0.60860 3rd Qu.:0.7302
## Max. :3.000 Max. :0.86167 Max. :0.84090 Max. :0.9725
## windspeed casual registered cnt
## Min. :0.02239 Min. : 2.0 Min. : 20 Min. : 22
## 1st Qu.:0.13495 1st Qu.: 315.5 1st Qu.:2497 1st Qu.:3152
## Median :0.18097 Median : 713.0 Median :3662 Median :4548
## Mean :0.19049 Mean : 848.2 Mean :3656 Mean :4504
## 3rd Qu.:0.23321 3rd Qu.:1096.0 3rd Qu.:4776 3rd Qu.:5956
## Max. :0.50746 Max. :3410.0 Max. :6946 Max. :8714
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 731 obs. of 16 variables:
## $ instant : num 1 2 3 4 5 6 7 8 9 10 ...
## $ dteday : Date, format: "2011-01-01" "2011-01-02" ...
## $ season : num 1 1 1 1 1 1 1 1 1 1 ...
## $ yr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ mnth : num 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weekday : num 6 0 1 2 3 4 5 6 0 1 ...
## $ workingday: num 0 0 1 1 1 1 1 0 0 1 ...
## $ weathersit: num 2 2 1 1 1 1 2 2 1 1 ...
## $ temp : num 0.344 0.363 0.196 0.2 0.227 ...
## $ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
## $ hum : num 0.806 0.696 0.437 0.59 0.437 ...
## $ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
## $ casual : num 331 131 120 108 82 88 148 68 54 41 ...
## $ registered: num 654 670 1229 1454 1518 ...
## $ cnt : num 985 801 1349 1562 1600 ...
## - attr(*, "spec")=
## .. cols(
## .. instant = col_double(),
## .. dteday = col_date(format = ""),
## .. season = col_double(),
## .. yr = col_double(),
## .. mnth = col_double(),
## .. holiday = col_double(),
## .. weekday = col_double(),
## .. workingday = col_double(),
## .. weathersit = col_double(),
## .. temp = col_double(),
## .. atemp = col_double(),
## .. hum = col_double(),
## .. windspeed = col_double(),
## .. casual = col_double(),
## .. registered = col_double(),
## .. cnt = col_double()
## .. )
## instant dteday season yr
## Min. : 1.0 Min. :2011-01-01 Min. :1.000 Min. :0.0000
## 1st Qu.:183.5 1st Qu.:2011-07-02 1st Qu.:2.000 1st Qu.:0.0000
## Median :366.0 Median :2012-01-01 Median :3.000 Median :1.0000
## Mean :366.0 Mean :2012-01-01 Mean :2.497 Mean :0.5007
## 3rd Qu.:548.5 3rd Qu.:2012-07-01 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :731.0 Max. :2012-12-31 Max. :4.000 Max. :1.0000
## mnth holiday weekday workingday
## Min. : 1.00 Min. :0.00000 Min. :0.000 Min. :0.000
## 1st Qu.: 4.00 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.000
## Median : 7.00 Median :0.00000 Median :3.000 Median :1.000
## Mean : 6.52 Mean :0.02873 Mean :2.997 Mean :0.684
## 3rd Qu.:10.00 3rd Qu.:0.00000 3rd Qu.:5.000 3rd Qu.:1.000
## Max. :12.00 Max. :1.00000 Max. :6.000 Max. :1.000
## weathersit temp atemp hum
## Min. :1.000 Min. :0.05913 Min. :0.07907 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.33708 1st Qu.:0.33784 1st Qu.:0.5200
## Median :1.000 Median :0.49833 Median :0.48673 Median :0.6267
## Mean :1.395 Mean :0.49538 Mean :0.47435 Mean :0.6279
## 3rd Qu.:2.000 3rd Qu.:0.65542 3rd Qu.:0.60860 3rd Qu.:0.7302
## Max. :3.000 Max. :0.86167 Max. :0.84090 Max. :0.9725
## windspeed casual registered cnt
## Min. :0.02239 Min. : 2.0 Min. : 20 Min. : 22
## 1st Qu.:0.13495 1st Qu.: 315.5 1st Qu.:2497 1st Qu.:3152
## Median :0.18097 Median : 713.0 Median :3662 Median :4548
## Mean :0.19049 Mean : 848.2 Mean :3656 Mean :4504
## 3rd Qu.:0.23321 3rd Qu.:1096.0 3rd Qu.:4776 3rd Qu.:5956
## Max. :0.50746 Max. :3410.0 Max. :6946 Max. :8714
day$seasoning = factor(day$season,
levels = c(1,2,3,4),
labels = c("Winter", "Spring","Summer","Fall"))
day$year = factor(day$yr,
levels = c(0,1),
labels = c("2011","2012"))
day$hol = factor(day$holiday,
levels = c(0,1),
labels = c("Not Holiday","Holiday"))
day$wd = factor(day$weekday,
levels = c(0,1,2,3,4,5,6),
labels = c('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'))
day$working = factor(day$workingday,
levels = c(0,1),
labels = c('Not Working Day','Working Day'))
day$weather = factor(day$weathersit,
levels = c(1,2,3,4),
labels = c('Good','Cloudy','Wet','Lousy'))
summary(day)## instant dteday season yr
## Min. : 1.0 Min. :2011-01-01 Min. :1.000 Min. :0.0000
## 1st Qu.:183.5 1st Qu.:2011-07-02 1st Qu.:2.000 1st Qu.:0.0000
## Median :366.0 Median :2012-01-01 Median :3.000 Median :1.0000
## Mean :366.0 Mean :2012-01-01 Mean :2.497 Mean :0.5007
## 3rd Qu.:548.5 3rd Qu.:2012-07-01 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :731.0 Max. :2012-12-31 Max. :4.000 Max. :1.0000
##
## mnth holiday weekday workingday
## Min. : 1.00 Min. :0.00000 Min. :0.000 Min. :0.000
## 1st Qu.: 4.00 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.000
## Median : 7.00 Median :0.00000 Median :3.000 Median :1.000
## Mean : 6.52 Mean :0.02873 Mean :2.997 Mean :0.684
## 3rd Qu.:10.00 3rd Qu.:0.00000 3rd Qu.:5.000 3rd Qu.:1.000
## Max. :12.00 Max. :1.00000 Max. :6.000 Max. :1.000
##
## weathersit temp atemp hum
## Min. :1.000 Min. :0.05913 Min. :0.07907 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.33708 1st Qu.:0.33784 1st Qu.:0.5200
## Median :1.000 Median :0.49833 Median :0.48673 Median :0.6267
## Mean :1.395 Mean :0.49538 Mean :0.47435 Mean :0.6279
## 3rd Qu.:2.000 3rd Qu.:0.65542 3rd Qu.:0.60860 3rd Qu.:0.7302
## Max. :3.000 Max. :0.86167 Max. :0.84090 Max. :0.9725
##
## windspeed casual registered cnt seasoning
## Min. :0.02239 Min. : 2.0 Min. : 20 Min. : 22 Winter:181
## 1st Qu.:0.13495 1st Qu.: 315.5 1st Qu.:2497 1st Qu.:3152 Spring:184
## Median :0.18097 Median : 713.0 Median :3662 Median :4548 Summer:188
## Mean :0.19049 Mean : 848.2 Mean :3656 Mean :4504 Fall :178
## 3rd Qu.:0.23321 3rd Qu.:1096.0 3rd Qu.:4776 3rd Qu.:5956
## Max. :0.50746 Max. :3410.0 Max. :6946 Max. :8714
##
## year hol wd working
## 2011:365 Not Holiday:710 Sunday :105 Not Working Day:231
## 2012:366 Holiday : 21 Monday :105 Working Day :500
## Tuesday :104
## Wednesday:104
## Thursday :104
## Friday :104
## Saturday :105
## weather
## Good :463
## Cloudy:247
## Wet : 21
## Lousy : 0
##
##
##
## [1] 0
## dataset has no missing data.
# Outliers Detection
## Subsetting by ignoring the categorical variables and column 14,15-casual and registered fr #now.
nocat=day[,-c(1,2,3,4,6,7,9,14,15,17,18,19,20,21,22)]
## Outlier detecting with mahalanobis distance
mahal = mahalanobis(nocat,
colMeans(nocat),
cov(nocat))
mahal## [1] 10.268865 7.621839 7.036662 5.961186 6.910391 9.933541
## [7] 6.573209 7.914417 13.249294 7.005850 7.974819 7.844763
## [13] 8.157304 8.205320 8.432440 7.821059 8.048934 9.978601
## [19] 5.440898 4.943166 9.796207 11.895396 9.881564 8.436761
## [25] 6.301065 12.293055 9.312673 8.848046 8.250943 12.140716
## [31] 5.876293 12.492898 6.428397 6.757065 6.123505 12.093803
## [37] 6.791330 10.634334 8.775205 6.501072 6.816131 8.148888
## [43] 5.906885 6.905365 12.317502 8.550533 5.038033 4.116809
## [49] 5.289272 26.758594 7.229926 7.212997 5.219142 10.444506
## [55] 4.263925 7.590403 5.433898 6.302602 9.322686 3.571473
## [61] 4.987758 8.796769 3.433009 6.346412 16.084742 6.727396
## [67] 7.982114 4.725667 33.070173 3.038677 4.024918 4.719285
## [73] 5.670675 2.943045 3.927959 2.263086 4.000514 11.075657
## [79] 4.846683 4.758604 2.886971 5.683812 6.193291 3.769429
## [85] 6.786280 6.158850 8.910670 7.518007 3.855954 9.498339
## [91] 3.632114 3.737260 4.317630 10.146484 8.481624 3.393949
## [97] 2.121745 5.543472 7.732667 6.629243 6.179487 5.450839
## [103] 5.513799 4.486827 3.620388 13.902969 5.265129 3.173506
## [109] 2.202801 2.321585 6.024551 3.558821 8.468156 5.459034
## [115] 3.027197 6.467859 7.953977 5.512067 2.918392 4.237152
## [121] 4.966565 1.666079 6.439028 5.606799 3.445275 1.535264
## [127] 3.726710 5.608219 1.814527 3.881699 2.280087 2.201580
## [133] 3.924331 7.664745 6.197085 2.991254 5.732616 4.306428
## [139] 3.801729 2.401684 4.085213 4.074839 3.920347 3.033099
## [145] 2.160485 2.863609 2.699888 4.652736 6.387778 7.264360
## [151] 7.472025 5.452593 10.608188 5.751025 6.148943 3.775972
## [157] 4.059807 2.650509 5.723279 8.799229 4.751416 4.716188
## [163] 4.835702 4.572790 4.441395 2.947223 2.278792 2.357211
## [169] 4.979753 5.932689 2.453477 2.578745 3.610236 4.198196
## [175] 3.652590 5.261357 6.750819 3.392420 3.975705 5.240495
## [181] 4.777609 10.125069 10.339580 4.859731 6.547355 4.934761
## [187] 3.456385 4.099776 4.206502 5.414828 5.551392 8.083174
## [193] 6.702264 4.653441 3.805352 1.450712 3.722564 4.762808
## [199] 4.127864 6.667513 7.653632 20.819632 20.582838 17.916988
## [205] 14.241427 8.003351 6.100742 10.803875 6.375060 10.638076
## [211] 11.133052 11.347777 7.197605 7.244595 6.204592 3.074908
## [217] 2.683661 5.964723 8.301764 5.357846 5.882294 9.464976
## [223] 7.821768 8.316538 4.914080 6.384050 2.487541 3.391567
## [229] 4.431332 4.511574 3.475900 4.974900 7.308849 5.571152
## [235] 3.771741 2.660617 4.390226 4.255103 19.583507 7.662607
## [241] 2.863966 3.000974 4.139689 2.059557 2.124743 4.292069
## [247] 4.855955 6.639859 11.143736 10.900613 13.746123 8.791243
## [253] 4.403378 4.367778 3.535963 2.357330 2.237579 3.957761
## [259] 1.578213 3.113690 2.873775 1.369712 4.225604 5.636681
## [265] 8.692296 18.352653 7.069807 9.733930 5.459446 6.706988
## [271] 5.413054 1.594121 1.442891 8.100081 5.850061 4.608499
## [277] 2.115091 2.022631 2.121480 5.931779 6.313588 5.488342
## [283] 6.516203 2.748139 7.791861 6.263515 3.077966 6.228915
## [289] 6.944677 2.188470 2.605056 7.342889 13.679394 2.749242
## [295] 5.311701 4.968088 2.874520 1.924140 2.257336 4.234912
## [301] 3.586054 17.548784 4.567684 4.308093 3.267412 5.149102
## [307] 3.268689 4.937711 6.119885 6.739545 5.797762 5.058706
## [313] 5.673852 4.730894 9.936242 5.501642 8.223062 7.246968
## [319] 2.696257 8.538559 6.909574 8.714448 6.513028 4.756312
## [325] 6.511985 9.945371 9.289923 9.949817 6.148920 7.948875
## [331] 5.585807 3.164121 6.524087 5.518559 6.380082 6.615700
## [337] 8.278445 8.399243 7.115627 9.688213 14.289612 7.154085
## [343] 7.660658 8.842209 14.351253 9.465361 6.642904 7.998825
## [349] 5.547163 6.938479 8.906785 8.609969 5.756857 8.162934
## [355] 7.299315 8.059823 7.909082 11.935060 11.039749 11.705582
## [361] 8.394681 10.136822 8.811444 6.135730 8.126212 6.174793
## [367] 9.337468 11.965149 8.600194 5.874867 4.345681 5.777652
## [373] 5.924424 7.805604 4.439290 8.580019 6.813402 9.500381
## [379] 7.923994 8.503632 6.774590 8.405721 11.432714 6.031868
## [385] 5.632816 11.332054 11.204470 12.242090 9.275000 5.607518
## [391] 8.907399 8.927231 6.022650 8.850759 5.771895 4.879510
## [397] 3.389308 2.958860 3.639823 8.300056 6.029917 4.278301
## [403] 4.314281 5.400849 4.073124 5.369231 9.209784 15.733663
## [409] 6.240262 4.723266 3.216370 6.477719 3.574888 5.208545
## [415] 5.380957 5.005737 3.491918 3.322583 2.909376 4.450742
## [421] 13.629479 6.583306 3.569670 4.639951 5.512074 2.397786
## [427] 2.913671 3.944120 7.561797 3.587537 4.255811 6.184778
## [433] 16.492058 10.603751 7.150304 4.947144 2.882991 3.269914
## [439] 4.516336 3.174136 6.108342 13.266372 9.173940 4.767288
## [445] 6.272719 7.767375 8.885577 9.581037 7.898925 11.059495
## [451] 10.181544 7.785126 4.731417 6.988142 4.326000 10.702165
## [457] 6.174926 6.019039 7.444213 4.021949 5.812995 7.547404
## [463] 11.571407 9.306195 9.633731 4.001969 4.331263 4.391318
## [469] 5.130245 8.014529 6.146503 6.519413 7.778893 1.601775
## [475] 5.901418 6.140465 8.572434 11.651766 6.709550 3.663755
## [481] 5.353028 2.820001 9.119990 5.027770 6.106647 2.216183
## [487] 1.596773 4.217175 4.107041 2.896807 6.673716 5.717424
## [493] 3.660680 4.813924 2.208419 6.499005 6.350635 7.051388
## [499] 4.128282 3.836382 2.891967 4.733591 4.559230 5.096589
## [505] 10.172500 6.165688 3.651751 3.484066 3.467486 3.608533
## [511] 4.231415 5.916637 5.638795 5.153360 5.287159 3.276443
## [517] 3.793119 3.148853 7.671124 6.081806 5.613143 3.515235
## [523] 4.602787 3.337066 4.608621 7.748353 5.981432 2.875169
## [529] 4.465856 8.769781 4.917802 3.520963 5.971746 4.777497
## [535] 2.010945 2.750170 4.420095 5.235508 3.485904 7.788161
## [541] 7.163746 5.553587 9.949545 7.000550 6.220204 7.804337
## [547] 6.595118 7.605565 6.412833 6.453135 6.349348 6.275914
## [553] 7.617974 11.610919 11.270038 2.474216 2.307657 3.795250
## [559] 3.966133 6.786175 4.875203 4.787745 3.066128 5.901827
## [565] 4.572885 2.831205 4.806558 6.411359 7.256109 2.914657
## [571] 4.003544 6.464742 6.475949 3.283073 4.832582 4.271537
## [577] 2.684221 3.116134 3.280903 3.029101 3.685822 7.391912
## [583] 8.368574 2.923117 3.334673 3.595032 2.966273 3.049723
## [589] 4.919233 5.603591 3.425234 2.367381 2.758219 4.581097
## [595] 450.050485 6.540250 5.054837 2.415110 4.246644 4.472428
## [601] 5.222983 5.076309 4.923461 7.144335 2.928030 2.973791
## [607] 4.058973 5.031963 4.338455 5.288554 6.856560 4.962812
## [613] 4.787191 4.471263 3.174633 3.762909 8.186393 7.677452
## [619] 5.907805 5.270687 4.050751 4.700427 4.748460 10.271302
## [625] 6.222528 3.029307 12.167883 4.997935 4.571943 5.048198
## [631] 10.903755 8.576922 4.802408 5.985962 4.995341 3.194906
## [637] 3.280681 10.209777 5.113222 3.986309 5.435661 7.531062
## [643] 4.072965 5.111243 10.876956 4.487625 5.721253 5.185781
## [649] 5.525442 7.121121 6.737725 8.313892 7.804253 5.747032
## [655] 5.673471 6.491556 7.703462 2.928609 8.865473 6.183501
## [661] 5.443059 4.702919 5.170487 5.821725 6.309736 10.383800
## [667] 13.793409 17.888839 7.158404 3.960292 5.030881 6.989753
## [673] 8.328218 6.437192 6.418924 6.731885 9.274383 12.641702
## [679] 5.584143 9.067901 7.578236 6.053964 9.159463 6.639162
## [685] 5.095469 5.360773 7.018331 6.273943 4.755932 5.524317
## [691] 5.149771 11.289732 4.684932 15.974964 9.927605 9.466132
## [697] 7.200722 6.410434 6.909472 8.531541 11.020218 8.189847
## [703] 6.278147 5.570924 9.450983 8.380455 6.513444 11.206186
## [709] 8.750152 8.636602 8.636035 6.900323 7.830187 7.480778
## [715] 7.745413 7.970823 7.724280 5.233610 5.695773 5.096564
## [721] 12.378674 19.464526 11.816352 12.147030 9.894699 15.553442
## [727] 13.485708 6.979471 11.424271 15.251901 7.980102
## [1] 24.32189
## Mode FALSE TRUE
## logical 3 728
## Classes 'tbl_df', 'tbl' and 'data.frame': 728 obs. of 7 variables:
## $ mnth : num 1 1 1 1 1 1 1 1 1 1 ...
## $ workingday: num 0 0 1 1 1 1 1 0 0 1 ...
## $ temp : num 0.344 0.363 0.196 0.2 0.227 ...
## $ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
## $ hum : num 0.806 0.696 0.437 0.59 0.437 ...
## $ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
## $ cnt : num 985 801 1349 1562 1600 ...
## mnth workingday temp atemp hum
## mnth 1.000000000 -0.005900951 0.22020534 0.22745863 0.22220369
## workingday -0.005900951 1.000000000 0.05265981 0.05218228 0.02432705
## temp 0.220205335 0.052659810 1.00000000 0.99170155 0.12696294
## atemp 0.227458630 0.052182275 0.99170155 1.00000000 0.13998806
## hum 0.222203691 0.024327046 0.12696294 0.13998806 1.00000000
## windspeed -0.207501752 -0.018796487 -0.15794412 -0.18364297 -0.24848910
## cnt 0.279977112 0.061156063 0.62749401 0.63106570 -0.10065856
## windspeed cnt
## mnth -0.20750175 0.27997711
## workingday -0.01879649 0.06115606
## temp -0.15794412 0.62749401
## atemp -0.18364297 0.63106570
## hum -0.24848910 -0.10065856
## windspeed 1.00000000 -0.23454500
## cnt -0.23454500 1.00000000
## corrplot 0.84 loaded
## m wr t a h wn c
## mnth 1
## workingday 1
## temp 1
## atemp B 1
## hum 1
## windspeed 1
## cnt , , 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
## The dataset meets the assumption of additivity.
# Linearity
random = rchisq(nrow(nocat),7)
fake = lm(random~., data = nocat)
summary(fake)##
## Call:
## lm(formula = random ~ ., data = nocat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5232 -2.6629 -0.5341 1.8824 17.6860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.717e+00 9.966e-01 7.743 3.28e-14 ***
## mnth 1.358e-03 4.235e-02 0.032 0.974
## workingday -3.334e-01 2.908e-01 -1.147 0.252
## temp 5.896e+00 5.851e+00 1.008 0.314
## atemp -7.475e+00 6.621e+00 -1.129 0.259
## hum 7.892e-01 1.055e+00 0.748 0.455
## windspeed -1.412e+00 1.903e+00 -0.742 0.458
## cnt -9.291e-05 9.755e-05 -0.952 0.341
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.644 on 723 degrees of freedom
## Multiple R-squared: 0.01057, Adjusted R-squared: 0.0009884
## F-statistic: 1.103 on 7 and 723 DF, p-value: 0.3591
## The dataset meets the assumption of linearity.
# Normality
library(moments)
skewness(noout,na.rm = TRUE)## mnth workingday temp atemp hum windspeed
## -0.01156011 -0.79187422 -0.05615175 -0.13708890 0.06051464 0.62476208
## cnt
## -0.04455265
## mnth workingday temp atemp hum windspeed cnt
## 1.792561 1.627065 1.880405 2.015374 2.465593 3.174248 2.191819
## The numbers are mostly centered around zero at the bottom. Despite outliers, the numbers are evenly distributed. The dataset meets the assumption of normality.
# Homogeneity and Homoscedasticity
fitvalues = scale(fake$fitted.values)
plot(fitvalues, standardized)## Date[1:731], format: "2011-01-01" "2011-01-02" "2011-01-03" "2011-01-04" "2011-01-05" ...
## 'dteday' is date type.Ddataset records bike sharing information from 1.1.2011 to 12.31.2012, every single day.
# 3.2 season
season_cnt = tapply(day$cnt,day$seasoning,mean)
season_cnt## Winter Spring Summer Fall
## 2604.133 4992.332 5644.303 4728.163
library(ggplot2)
cleanup = theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(color = 'black'),
axis.line.y = element_line(color = 'black'),
legend.key = element_rect(fill = 'white'),
text = element_text(size = 15))
season_count_yr <- ggplot(day, aes(day$seasoning, day$cnt))+
geom_bar(stat='summary', fun.y='mean',position = 'dodge',aes(fill =day$year)) +
xlab('Season') +
ylab('Average Daily Rental Bikes') +
ggtitle('Avg Daily Rental Bike VS Season') +
theme(plot.title = element_text(hjust=0.5))+ cleanup
season_count_yr ## Winter season is the lowest bike rental season, while summer is the peak bike rental season. Compared with 2011, total bike rental of each season increases.
# 3.3 yr
yr_cnt = tapply(day$cnt,day$year,mean)
yr_cnt## 2011 2012
## 3405.762 5599.934
yr_count <- ggplot(day,aes(day$year, day$cnt)) +
geom_bar(stat='summary',fun.y='mean')+
xlab('Year') +
ylab('Average Daily Rental Bikes') +
ggtitle('Average Daily Rental Bike VS Year') +
theme(plot.title = element_text(hjust=0.5))+cleanup
yr_countyr_count_weekday <- ggplot(day,aes(day$year, day$cnt)) +
geom_bar(stat='summary',fun.y='mean',aes(fill = day$wd))+
xlab('Year') +
ylab('Average Daily Rental Bikes') +
ggtitle('Average Daily Rental Bike VS Year') +
theme(plot.title = element_text(hjust=0.5))+cleanup
yr_count_weekdayyr_count_work <- ggplot(day,aes(day$year, day$cnt)) +
geom_bar(stat='summary',fun.y='mean',aes(fill = day$working))+
xlab('Year') +
ylab('Average Daily Rental Bikes') +
ggtitle('Average Daily Rental Bike VS Year/Workingday') +
theme(plot.title = element_text(hjust=0.5))+cleanup
yr_count_work ## Compared with 2011, 2012 got significant increase within each weekday regarding bike rental. Both working day and non working day witness an increase in 2012.
# 3.4 months
day$mnth = as.factor(day$mnth)
month_cnt = tapply(day$cnt,day$mnth,mean)
month_cnt## 1 2 3 4 5 6 7 8
## 2176.339 2655.298 3692.258 4484.900 5349.774 5772.367 5563.677 5664.419
## 9 10 11 12
## 5766.517 5199.226 4247.183 3403.806
mnth_cnt = ggplot(day,aes(day$mnth,day$cnt))+
geom_bar(stat='summary',fun.y='mean')+
xlab('Month') +
ylab('Average Daily Rental Bike') +
ggtitle('Average Daily Rental Bike VS Month') +
theme(plot.title = element_text(hjust=0.5))+cleanup
mnth_cnt ## While summer is the peak season for bike rental, June is the peak month. Months between May and October are the peak months, requiring more maintainace.
# 3.5 holidays
holiday_cnt = tapply(day$cnt,day$hol,mean)
holiday_cnt## Not Holiday Holiday
## 4527.104 3735.000
hol_cnt = ggplot(day,aes(day$hol,day$cnt)) +
geom_bar(stat='summary',fun.y='mean')+
xlab('Holiday') +
ylab('Average Daily Rental Bike') +
ggtitle('Average Daily Rental Bike VS Holiday') +
theme(plot.title = element_text(hjust=0.5))+cleanup
hol_cnt ## In two years, there are only 21 days are holiday (710 days are not holiday). The daily average rental bike of holiday is less than nonholiday.
# 3.6 weekday
weekday_cnt = tapply(day$cnt,day$wd,mean)
weekday_cnt## Sunday Monday Tuesday Wednesday Thursday Friday Saturday
## 4228.829 4338.124 4510.663 4548.538 4667.260 4690.288 4550.543
wd_cnt <- ggplot(day,aes(day$wd,day$cnt)) +
geom_bar(stat = 'summary',fun.y = 'mean')+
xlab('Weekday') +
ylab('Average Daily Rental Bike')+
ggtitle('Average Daily Rental Bike VS Weekday') +
theme(plot.title = element_text(hjust=0.5))+cleanup
wd_cnt wd_cnt1 <- ggplot(day,aes(day$wd,day$cnt)) +
geom_bar(stat = 'summary',fun.y = 'mean',aes(fill=day$seasoning))+
xlab('Weekday') +
ylab('Average Daily Rental Bike')+
ggtitle('Average Daily Rental Bike VS Weekday/Season') +
theme(plot.title = element_text(hjust=0.5))+cleanup
wd_cnt1 ## The daily average rental bike on weekday does not have much difference. However, the winter time people tend to choose different transportation methods.
# 3.7 workingday
workingday_cnt <- tapply(day$cnt,day$working,mean)
workingday_cnt## Not Working Day Working Day
## 4330.169 4584.820
working_cnt <- ggplot(day,aes(day$working,day$cnt))+
geom_bar(stat = 'summary',fun.y = 'mean')+
xlab('Workingdays') +
ylab('Average Daily Rental Bike') +
ggtitle('Average Daily Rental Bike VS Workingdays') +
theme(plot.title = element_text(hjust=0.5))+cleanup
working_cnt ## The daily average rental bikes on working day is more than nonworking day, which means workingday requires more maintance and bike supply.
# 3.8 weathersit
weather1 <- tapply(day$cnt,day$weather,mean)
weather1## Good Cloudy Wet Lousy
## 4876.786 4035.862 1803.286 NA
weather_cnt <- tapply(day$cnt,day$weather,mean)
wea_cnt <- ggplot(day,aes(day$weather,day$cnt))+
geom_bar(stat = 'summary',fun.y = 'mean')+
xlab('Weather') +
ylab('Average Daily Rental Bike') +
ggtitle('Average Daily Rental Bike VS Weather') +
theme(plot.title = element_text(hjust=0.5)) + cleanup
wea_cnt ## The weather condition gets worse, the bike rental gets little.
# 3.9 temp
plot(day$temp,day$cnt,type='h',col='blue',xlab='actual temperature',ylab='total bike cnt')day$tempbucket = cut(x=day$temp,breaks=c(0,0.20,0.40,0.60,0.80,1.00))
levels(day$tempbucket) = c('Very Low','Low','Medium','High','Very High')
table(day$tempbucket)##
## Very Low Low Medium High Very High
## 34 227 216 238 16
## Very Low Low Medium High Very High
## 1543.235 3178.546 5011.208 5714.340 4765.312
temp_count = ggplot(day,aes(day$tempbucket,day$cnt)) +
geom_bar(stat = 'summary',fun.y = 'mean',position = position_dodge()) +
xlab('Temp') +
ylab('Average Daily Rental Bike') +
ggtitle('Average Daily Rental Bike VS Temperature') +
theme(plot.title = element_text(hjust=0.5)) + cleanup
temp_count ## When the temperature is very low the count gets reduced.
temp_month = ggplot(day, mapping = aes(x=day$mnth, y=day$cnt,fill=day$temp)) +
geom_bar(stat = 'identity')+
scale_fill_gradient2(low = "blue", high = "red", mid = "green",midpoint = mean(day$temp))+
xlab('Month')+
ylab('Bike Rental')+ ggtitle('Rental Bike VS Temp/Month') +
theme(plot.title = element_text(hjust=0.5))+ cleanup
temp_month#3.9.1 atemp
plot(day$atemp,day$cnt,type='h',col='lightblue',xlab='Actual Feel Temperature',ylab='Total Bike Cnt')# 3.10 humidity
plot(day$hum,day$cnt,type='h',col='lightgreen',xlab='Humidity',ylab='Total Bike Cnt')day$humbucket = cut(x=day$hum,breaks = c(0,0.20,0.40,0.60,0.80,1.00))
levels(day$humbucket)=c('0.0-0.2','0.2-0.4','0.4-0.6','0.6-0.8','0.8-1.0')
table(day$humbucket)##
## 0.0-0.2 0.2-0.4 0.4-0.6 0.6-0.8 0.8-1.0
## 1 28 292 317 92
## 0.0-0.2 0.2-0.4 0.4-0.6 0.6-0.8 0.8-1.0
## 1635.000 4454.429 4601.185 4760.148 3404.174
hum_count = ggplot(day,aes(day$humbucket,day$cnt)) +
geom_bar(stat = 'summary',fun.y = 'mean',position = position_dodge()) +
xlab('Humidity') +
ylab('Rental Bikes') +
ggtitle('Rental Bike VS Humidity') +
theme(plot.title = element_text(hjust=0.5)) + cleanup
hum_count ## The humidity is low, the bike rental is low.
# 3.11 windspeed
plot(day$windspeed,day$cnt,type='h',col='darkgreen',xlab='Windspeed',ylab='Total Bike Cnt')day$windbucket = cut(x=day$windspeed,breaks = c(0.0,0.1,0.2,0.3,0.4,0.5))
levels(day$windbucket)=c('Very Low','Low','Medium','High','Very High')
table(day$windspeed)##
## 0.0223917 0.0423042 0.0454042 0.0454083 0.04665 0.047275 0.0503792 0.0528708
## 1 1 1 1 1 1 1 1
## 0.053213 0.057225 0.0578458 0.0584708 0.0597042 0.0609583 0.0615708 0.0621958
## 1 1 1 1 1 1 1 1
## 0.0622083 0.06345 0.0640708 0.0659292 0.0665417 0.0665458 0.0684208 0.0690375
## 1 2 1 1 1 1 1 1
## 0.0702833 0.0721458 0.0727708 0.0727792 0.0733958 0.0739826 0.0746375 0.0771167
## 1 1 1 1 1 1 1 1
## 0.077125 0.0772304 0.0783667 0.0783833 0.08085 0.0814792 0.0814833 0.0820917
## 1 1 1 1 1 1 1 1
## 0.0827167 0.0827208 0.082725 0.0833333 0.0833458 0.0839583 0.0839625 0.083975
## 1 2 1 1 1 1 1 1
## 0.0845958 0.08645 0.088913 0.0895583 0.0895652 0.0901833 0.0908042 0.0908083
## 1 1 1 1 1 1 1 1
## 0.091425 0.0920542 0.0926667 0.0939208 0.094113 0.0945333 0.0945458 0.0957833
## 1 2 1 1 1 1 1 1
## 0.0964042 0.0970208 0.0982583 0.0988958 0.0989 0.0995125 0.100133 0.100742
## 1 1 1 1 1 1 1 1
## 0.100754 0.101371 0.101379 0.102 0.102608 0.103246 0.103863 0.104467
## 1 1 1 2 1 1 1 1
## 0.104475 0.10635 0.106354 0.107588 0.108213 0.10855 0.110087 0.1107
## 1 3 1 1 1 1 1 3
## 0.110704 0.110708 0.111329 0.112562 0.113187 0.113192 0.113812 0.113817
## 1 1 1 1 1 1 1 1
## 0.113837 0.114429 0.115054 0.115062 0.115522 0.115671 0.1163 0.116908
## 1 1 1 2 1 1 1 2
## 0.116929 0.117537 0.117546 0.117562 0.118167 0.118171 0.118787 0.118792
## 1 1 1 1 2 1 1 3
## 0.119408 0.119412 0.120642 0.12065 0.121271 0.121896 0.122132 0.122512
## 1 1 1 2 1 2 1 2
## 0.123133 0.123142 0.1233 0.123767 0.124375 0.124379 0.124383 0.125008
## 1 1 1 1 1 2 1 1
## 0.125013 0.125248 0.125621 0.125629 0.126237 0.126258 0.126548 0.126871
## 1 1 1 1 1 1 1 1
## 0.126883 0.1275 0.127839 0.128125 0.128733 0.129354 0.129796 0.129975
## 1 1 1 1 1 2 1 1
## 0.129979 0.129987 0.1306 0.131221 0.131225 0.131229 0.131846 0.132463
## 1 1 2 2 1 1 1 1
## 0.132467 0.133083 0.1331 0.133696 0.133721 0.134329 0.134337 0.134342
## 1 2 1 1 2 1 1 1
## 0.13495 0.134954 0.134958 0.135571 0.135583 0.136212 0.136817 0.136829
## 2 3 1 1 1 1 3 1
## 0.136926 0.137442 0.138054 0.138058 0.138067 0.138683 0.138692 0.139308
## 1 1 1 1 1 1 1 1
## 0.139929 0.14055 0.140554 0.141162 0.141179 0.141787 0.141796 0.1418
## 1 2 1 1 2 1 2 1
## 0.141804 0.142122 0.142404 0.142421 0.143029 0.143042 0.143667 0.143679
## 1 1 1 1 1 1 1 1
## 0.144283 0.144287 0.144904 0.145365 0.145525 0.146133 0.146142 0.146763
## 1 1 2 1 1 1 2 1
## 0.146767 0.146775 0.147379 0.147392 0.148008 0.148017 0.148021 0.148629
## 1 2 1 1 1 1 1 1
## 0.148642 0.149871 0.149879 0.149883 0.1505 0.151121 0.151733 0.151737
## 1 1 1 3 1 2 1 1
## 0.151742 0.152979 0.152987 0.152992 0.153608 0.153617 0.1538 0.154229
## 2 1 1 2 1 1 1 1
## 0.154233 0.154846 0.15485 0.155091 0.155471 0.155475 0.156096 0.1561
## 1 1 1 1 2 1 1 1
## 0.156717 0.157346 0.15735 0.157963 0.157971 0.157975 0.15833 0.1592
## 1 1 2 1 1 1 1 1
## 0.159825 0.160296 0.160446 0.16045 0.161071 0.161079 0.162312 0.162317
## 1 1 1 1 2 2 1 1
## 0.162937 0.162938 0.163554 0.163567 0.164179 0.164183 0.164187 0.164796
## 1 1 1 2 1 1 1 1
## 0.1648 0.164813 0.165417 0.165425 0.165429 0.166054 0.166658 0.166667
## 1 1 1 1 1 1 1 3
## 0.167283 0.1673 0.167304 0.167908 0.167912 0.168529 0.168533 0.168537
## 1 1 1 1 3 1 2 1
## 0.168726 0.169158 0.169171 0.169771 0.169779 0.170396 0.171025 0.171638
## 2 1 1 1 1 1 2 1
## 0.171646 0.17165 0.17197 0.172262 0.172267 0.172883 0.172888 0.172896
## 1 1 1 1 1 1 1 2
## 0.173513 0.173517 0.174129 0.174138 0.174746 0.174754 0.174758 0.175379
## 1 1 2 1 1 1 1 1
## 0.175383 0.175996 0.176 0.176617 0.176625 0.17725 0.177867 0.178479
## 1 1 1 2 1 1 1 1
## 0.178483 0.178496 0.179108 0.179117 0.179721 0.179725 0.179729 0.180967
## 1 1 1 1 1 1 1 1
## 0.180975 0.181596 0.1816 0.182213 0.182221 0.182833 0.182842 0.183454
## 2 1 1 1 1 1 1 1
## 0.183463 0.183471 0.184087 0.184092 0.1843 0.184309 0.184696 0.1847
## 1 1 1 1 1 1 1 1
## 0.185312 0.185325 0.185333 0.18595 0.186562 0.186571 0.1869 0.187183
## 1 1 1 1 1 1 1 1
## 0.187187 0.187192 0.187552 0.187808 0.187821 0.188433 0.18845 0.188839
## 1 2 1 2 1 1 1 1
## 0.189062 0.189067 0.189667 0.189675 0.189679 0.1903 0.190304 0.190308
## 1 2 1 1 1 1 1 1
## 0.190913 0.190917 0.190925 0.190929 0.191542 0.192167 0.192175 0.192748
## 1 1 1 1 1 1 2 1
## 0.192783 0.193417 0.194017 0.194029 0.194037 0.195267 0.195279 0.195683
## 1 1 1 1 1 1 1 1
## 0.195904 0.196521 0.197146 0.19715 0.197763 0.198992 0.199625 0.199633
## 1 1 1 1 1 1 1 1
## 0.199638 0.199642 0.200254 0.200258 0.200875 0.201487 0.201492 0.20275
## 1 1 1 2 1 1 1 1
## 0.203117 0.203346 0.203367 0.205229 0.205717 0.205846 0.20585 0.205854
## 1 1 1 1 1 1 1 1
## 0.206467 0.206471 0.206475 0.206479 0.207092 0.207713 0.207721 0.208317
## 1 1 1 1 2 2 1 1
## 0.208342 0.208954 0.208967 0.209571 0.209575 0.209579 0.210821 0.210829
## 1 2 1 2 1 1 1 1
## 0.210833 0.211454 0.212062 0.212204 0.212692 0.212696 0.213009 0.2133
## 1 2 1 1 1 1 1 1
## 0.213938 0.214546 0.214558 0.215171 0.215175 0.215792 0.215804 0.216412
## 1 1 1 1 1 2 1 1
## 0.216425 0.217646 0.219521 0.219529 0.22015 0.220154 0.220158 0.220775
## 1 1 1 1 1 1 2 2
## 0.221396 0.2214 0.221404 0.221935 0.222013 0.222021 0.222025 0.222587
## 1 1 1 1 1 1 1 1
## 0.222633 0.222642 0.223235 0.223258 0.223267 0.223883 0.224496 0.225117
## 1 1 1 1 1 1 2 1
## 0.225129 0.22575 0.225754 0.226375 0.226987 0.226992 0.226996 0.227604
## 2 2 1 1 1 1 1 1
## 0.227612 0.228246 0.22825 0.228858 0.229083 0.229475 0.229479 0.230092
## 1 1 1 3 1 1 1 1
## 0.230104 0.230721 0.230725 0.231017 0.231354 0.231358 0.232583 0.232596
## 1 1 2 1 1 1 1 1
## 0.23297 0.233204 0.233208 0.233221 0.233842 0.234261 0.234471 0.235067
## 1 1 1 1 1 1 1 1
## 0.235075 0.235092 0.235692 0.236321 0.236325 0.236329 0.236937 0.23695
## 1 1 1 2 1 1 2 1
## 0.237562 0.237563 0.237567 0.238804 0.238813 0.239465 0.24005 0.240058
## 1 1 1 1 1 1 1 1
## 0.240063 0.240667 0.240679 0.241925 0.243167 0.243339 0.243787 0.2444
## 1 1 1 1 1 1 1 1
## 0.244408 0.245033 0.2466 0.247521 0.24815 0.248309 0.248539 0.248754
## 1 1 1 1 1 1 1 1
## 0.249375 0.249383 0.250496 0.250617 0.251258 0.251791 0.251871 0.253108
## 1 1 1 2 1 1 1 1
## 0.253112 0.253121 0.253733 0.254367 0.257458 0.258083 0.258092 0.258708
## 1 1 1 1 1 1 1 1
## 0.258713 0.260575 0.260883 0.261817 0.261821 0.261877 0.263063 0.264308
## 1 1 1 1 1 1 1 1
## 0.264925 0.266175 0.266804 0.268025 0.268033 0.268042 0.269283 0.270529
## 1 2 1 1 1 1 1 1
## 0.270604 0.271146 0.271158 0.271775 0.271779 0.273629 0.274246 0.274871
## 1 1 1 1 1 1 1 1
## 0.274879 0.27675 0.277354 0.277752 0.278612 0.281104 0.281717 0.281721
## 1 1 1 1 1 1 1 1
## 0.282337 0.283583 0.283587 0.284813 0.284829 0.284833 0.288783 0.289686
## 1 1 1 1 1 1 1 1
## 0.289796 0.290421 0.290429 0.291374 0.291671 0.292287 0.292296 0.29385
## 1 1 1 1 1 1 1 1
## 0.293961 0.295274 0.295392 0.2954 0.296029 0.296037 0.300383 0.300388
## 1 1 1 1 1 2 1 1
## 0.301 0.303496 0.304108 0.304627 0.304659 0.30535 0.305362 0.306596
## 1 2 1 1 1 1 1 1
## 0.307833 0.307846 0.312139 0.3122 0.314063 0.314675 0.316546 0.31965
## 1 1 1 1 1 1 1 1
## 0.320908 0.324021 0.324474 0.325258 0.3265 0.328996 0.329665 0.334571
## 1 1 1 1 1 1 1 1
## 0.335825 0.340808 0.341352 0.342046 0.342667 0.343279 0.343287 0.343943
## 1 1 1 1 2 1 1 1
## 0.344546 0.345779 0.346539 0.347633 0.347642 0.347835 0.34913 0.350133
## 1 1 1 1 1 1 1 1
## 0.350754 0.351371 0.353242 0.357587 0.358196 0.3582 0.36195 0.365671
## 1 1 1 1 1 1 2 1
## 0.368167 0.374383 0.375617 0.376871 0.378108 0.385571 0.386821 0.388067
## 1 1 1 1 1 1 1 1
## 0.398008 0.407346 0.409212 0.4148 0.415429 0.417908 0.421642 0.422275
## 1 1 1 1 1 1 1 1
## 0.441563 0.507463
## 1 1
## Very Low Low Medium High Very High
## 4840.356 4828.333 4239.558 3385.597 3180.625
wind_count = ggplot(day,aes(day$windbucket,day$cnt)) +
geom_bar(stat = 'summary',fun.y = 'mean',position = position_dodge()) +
xlab('Windspeed') +
ylab('Rental Bikes') +
ggtitle('Rental Bike VS Windspeed') +
theme(plot.title = element_text(hjust=0.5)) + cleanup
wind_count# correlation (heatmap)
library(corrplot)
cor = day[,-c(1,2,14,15,16,17,18,19,20,21,22,23,24,25)]
cor$mnth = as.numeric(cor$mnth)
head(cor)## # A tibble: 6 x 11
## season yr mnth holiday weekday workingday weathersit temp atemp hum
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 1 0 6 0 2 0.344 0.364 0.806
## 2 1 0 1 0 0 0 2 0.363 0.354 0.696
## 3 1 0 1 0 1 1 1 0.196 0.189 0.437
## 4 1 0 1 0 2 1 1 0.2 0.212 0.590
## 5 1 0 1 0 3 1 1 0.227 0.229 0.437
## 6 1 0 1 0 4 1 1 0.204 0.233 0.518
## # … with 1 more variable: windspeed <dbl>
## Classes 'tbl_df', 'tbl' and 'data.frame': 731 obs. of 11 variables:
## $ season : num 1 1 1 1 1 1 1 1 1 1 ...
## $ yr : num 0 0 0 0 0 0 0 0 0 0 ...
## $ mnth : num 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weekday : num 6 0 1 2 3 4 5 6 0 1 ...
## $ workingday: num 0 0 1 1 1 1 1 0 0 1 ...
## $ weathersit: num 2 2 1 1 1 1 2 2 1 1 ...
## $ temp : num 0.344 0.363 0.196 0.2 0.227 ...
## $ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
## $ hum : num 0.806 0.696 0.437 0.59 0.437 ...
## $ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
# Q1: Correlation between different variables.
## season VS weathersit
cor.sws = cor.test(x=day$weathersit,y=day$season)
cor.sws ##
## Pearson's product-moment correlation
##
## data: day$weathersit and day$season
## t = 0.51879, df = 729, p-value = 0.6041
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.05337693 0.09159703
## sample estimates:
## cor
## 0.01921103
##
## Pearson's product-moment correlation
##
## data: day$temp and day$season
## t = 9.5776, df = 729, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2683056 0.3971994
## sample estimates:
## cor
## 0.3343149
##
## Pearson's product-moment correlation
##
## data: day$hum and day$season
## t = 5.6679, df = 729, p-value = 2.083e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1349415 0.2738783
## sample estimates:
## cor
## 0.2054448
##
## Pearson's product-moment correlation
##
## data: day$windspeed and day$season
## t = -6.3531, df = 729, p-value = 3.714e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2966332 -0.1591765
## sample estimates:
## cor
## -0.2290463
##
## Pearson's product-moment correlation
##
## data: day$hum and day$windspeed
## t = -6.9265, df = 729, p-value = 9.488e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3153210 -0.1792046
## sample estimates:
## cor
## -0.2484891
## season VS month
day1 <- day
day1$mnth <- as.numeric(day1$mnth)
cor.sm = cor.test(x=day1$season,y=day1$mnth)
cor.sm##
## Pearson's product-moment correlation
##
## data: day1$season and day1$mnth
## t = 40.404, df = 729, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8076184 0.8525527
## sample estimates:
## cor
## 0.8314401
##
## Pearson's product-moment correlation
##
## data: day$weathersit and day$hum
## t = 19.784, df = 729, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5417497 0.6362877
## sample estimates:
## cor
## 0.5910446
##
## Pearson's product-moment correlation
##
## data: day$hum and day1$mnth
## t = 6.1533, df = 729, p-value = 1.251e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1521415 0.2900439
## sample estimates:
## cor
## 0.2222037
##
## Pearson's product-moment correlation
##
## data: day$holiday and day$workingday
## t = -7.0614, df = 729, p-value = 3.851e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3196711 -0.1838829
## sample estimates:
## cor
## -0.2530227
# Q2: Is temperature associated with total bike rentals?
temp.anv <- aov(cnt~tempbucket,data=day)
summary(temp.anv)## Df Sum Sq Mean Sq F value Pr(>F)
## tempbucket 4 1.102e+09 275540185 122.2 <2e-16 ***
## Residuals 726 1.637e+09 2255337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Based on the P value, less than 0.05, we reject the null hypothesis, which means there are highly significant differences regarding different temperature range for average daily bike rental. As the ANOVA test is significant, we can compute Turkey HSD for performing multiple compariosn between the means of different temperature buckets to figure out which temperature buckets have significant difference.
TukeyHSD(temp.anv)## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cnt ~ tempbucket, data = day)
##
## $tempbucket
## diff lwr upr p adj
## Low-Very Low 1635.3110 880.0890 2390.5329 0.0000000
## Medium-Very Low 3467.9730 2710.2501 4225.6959 0.0000000
## High-Very Low 4171.1050 3418.1600 4924.0500 0.0000000
## Very High-Very Low 3222.0772 1977.0114 4467.1430 0.0000000
## Medium-Low 1832.6621 1442.2989 2223.0252 0.0000000
## High-Low 2535.7941 2154.7881 2916.8001 0.0000000
## Very High-Low 1586.7662 524.4913 2649.0412 0.0004696
## High-Medium 703.1320 317.1924 1089.0716 0.0000078
## Very High-Medium -245.8958 -1309.9503 818.1586 0.9698722
## Very High-High -949.0278 -2009.6852 111.6296 0.1042759
### The output indicates that the difference between very high and high, very high and medium is not significant (P-value > 0.05).
# Q3: Whether the bike rental on Holiday is different than that on nonholiday?
### H0: mu_Not Holiday - mu_Holiday = 0
### HA: mu_Not Holiday - mu_Holiday != 0
table(day$hol)##
## Not Holiday Holiday
## 710 21
## Loading required package: BayesFactor
## Loading required package: coda
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
## Response variable: numerical
## Explanatory variable: categorical (2 levels)
## n_Not Holiday = 710, y_bar_Not Holiday = 4527.1042, s_Not Holiday = 1929.0139
## n_Holiday = 21, y_bar_Holiday = 3735, s_Holiday = 2103.3507
## H0: mu_Not Holiday = mu_Holiday
## HA: mu_Not Holiday != mu_Holiday
## t = 1.7047, df = 20
## p_value = 0.1037
## Based on the p-value = 0.1038 , larger than 0.05, we fail to reject null hypothesis. The average bike rental on holiday is not different than bike rental on nonholiday.
# Q4: Whether the bike rental on different season is different?
season.anv <- aov(cnt~seasoning, data = day)
summary(season.anv)## Df Sum Sq Mean Sq F value Pr(>F)
## seasoning 3 9.506e+08 316865289 128.8 <2e-16 ***
## Residuals 727 1.789e+09 2460715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Based on the P value, less than 0.05, we reject the null hypothesis, which means there are highly significant differences between the seasons on average daily bike rental. As the ANOVA test is significant, we can compute Turkey HSD for performing multiple compariosn between the means of seasons to figure out which seasons have significant difference.
TukeyHSD(season.anv)## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = cnt ~ seasoning, data = day)
##
## $seasoning
## diff lwr upr p adj
## Spring-Winter 2388.1989 1965.3325 2811.0653 0.0000000
## Summer-Winter 3040.1706 2619.5409 3460.8003 0.0000000
## Fall-Winter 2124.0303 1697.6444 2550.4163 0.0000000
## Summer-Spring 651.9717 233.0927 1070.8507 0.0003925
## Fall-Spring -264.1686 -688.8276 160.4904 0.3781913
## Fall-Summer -916.1403 -1338.5720 -493.7085 0.0000002
## The output indicates that the difference Fall-Spring is not significant. Thus, it appears the difference lies in winter & spring, summer, fall, and summer $spring, fall.
# Q5. Whether the bike rental on different months are different?
mnth.anv <- aov(cnt~day$mnth, data = day)
summary(mnth.anv)## Df Sum Sq Mean Sq F value Pr(>F)
## day$mnth 11 1.070e+09 97290206 41.9 <2e-16 ***
## Residuals 719 1.669e+09 2321757
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Based on p-value less than 0.05, we reject the null. There are significant difference among months regarding bike rental count.
# Q6: Whether the bike rental on working day is greater than that on nonworking day?
### H0: mu_Not Working Day - mu_Working Day = 0
### HA: mu_Not Working Day - mu_Working Day < 0
table(day$working)##
## Not Working Day Working Day
## 231 500
library(statsr)
inference(y=cnt, x=working,data=day, statistic = 'mean', conf_level= 0.9, type = "ht", null=0, alternative = "less", method = "theoretical", order = c("Not Working Day","Working Day"))## Response variable: numerical
## Explanatory variable: categorical (2 levels)
## n_Not Working Day = 231, y_bar_Not Working Day = 4330.1688, s_Not Working Day = 2052.1412
## n_Working Day = 500, y_bar_Working Day = 4584.82, s_Working Day = 1878.4156
## H0: mu_Not Working Day = mu_Working Day
## HA: mu_Not Working Day < mu_Working Day
## t = -1.6014, df = 230
## p_value = 0.0553
## Based on the P-value equals to 0.0546, greater than 0.05, we fail to reject the null hypothesis. There's no significant different between working and non working day regarding bike rental.
# Q7. Whether the bike rental on different weekdays is significantly different?
wd.anv <- aov(cnt~wd,data=day)
summary(wd.anv)## Df Sum Sq Mean Sq F value Pr(>F)
## wd 6 1.766e+07 2943170 0.783 0.583
## Residuals 724 2.722e+09 3759498
## Warning in summary.lm(actual): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = cnt ~ ., data = day)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.083e-11 -4.270e-13 -3.000e-14 4.150e-13 4.238e-11
##
## Coefficients: (8 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.134e-12 1.500e-12 -1.423e+00 0.15511
## instant -8.777e-16 1.329e-14 -6.600e-02 0.94736
## dteday NA NA NA NA
## season 5.533e-14 2.694e-13 2.050e-01 0.83731
## yr -1.582e-12 4.909e-12 -3.220e-01 0.74725
## mnth2 1.468e-13 7.235e-13 2.030e-01 0.83923
## mnth3 -1.428e-12 1.059e-12 -1.349e+00 0.17791
## mnth4 -2.677e-12 1.562e-12 -1.714e+00 0.08703 .
## mnth5 -2.323e-12 1.925e-12 -1.207e+00 0.22791
## mnth6 -5.732e-12 2.292e-12 -2.501e+00 0.01261 *
## mnth7 -4.848e-12 2.697e-12 -1.798e+00 0.07268 .
## mnth8 -5.073e-12 3.054e-12 -1.661e+00 0.09717 .
## mnth9 -3.818e-12 3.389e-12 -1.126e+00 0.26038
## mnth10 -3.547e-12 3.792e-12 -9.350e-01 0.34997
## mnth11 -3.050e-12 4.169e-12 -7.320e-01 0.46464
## mnth12 -2.842e-12 4.513e-12 -6.300e-01 0.52904
## holiday 3.279e-13 8.316e-13 3.940e-01 0.69344
## weekday -2.238e-13 7.305e-14 -3.063e+00 0.00228 **
## workingday 1.580e-12 5.217e-13 3.028e+00 0.00255 **
## weathersit 1.383e-12 4.721e-13 2.929e+00 0.00351 **
## temp 1.178e-11 6.281e-12 1.876e+00 0.06114 .
## atemp -4.200e-12 6.176e-12 -6.800e-01 0.49675
## hum 1.735e-12 2.452e-12 7.080e-01 0.47927
## windspeed -1.476e-12 4.642e-12 -3.180e-01 0.75064
## casual 1.000e+00 3.526e-16 2.836e+15 < 2e-16 ***
## registered 1.000e+00 2.085e-16 4.797e+15 < 2e-16 ***
## seasoningSpring 1.062e-13 7.006e-13 1.520e-01 0.87959
## seasoningSummer -1.518e-14 7.338e-13 -2.100e-02 0.98350
## seasoningFall NA NA NA NA
## year2012 NA NA NA NA
## holHoliday NA NA NA NA
## wdMonday -7.558e-13 5.297e-13 -1.427e+00 0.15405
## wdTuesday -7.001e-13 4.899e-13 -1.429e+00 0.15344
## wdWednesday -2.964e-13 4.657e-13 -6.360e-01 0.52471
## wdThursday -8.658e-14 4.459e-13 -1.940e-01 0.84610
## wdFriday NA NA NA NA
## wdSaturday NA NA NA NA
## workingWorking Day NA NA NA NA
## weatherCloudy -1.936e-13 4.414e-13 -4.390e-01 0.66112
## weatherWet NA NA NA NA
## tempbucketLow -8.291e-13 7.549e-13 -1.098e+00 0.27248
## tempbucketMedium -9.331e-13 1.081e-12 -8.640e-01 0.38814
## tempbucketHigh -1.217e-12 1.390e-12 -8.760e-01 0.38139
## tempbucketVery High -1.033e-12 1.827e-12 -5.660e-01 0.57183
## humbucket0.4-0.6 -5.317e-13 7.379e-13 -7.210e-01 0.47141
## humbucket0.6-0.8 -8.779e-13 9.996e-13 -8.780e-01 0.38012
## humbucket0.8-1.0 -2.625e-12 1.341e-12 -1.958e+00 0.05069 .
## windbucketLow -1.740e-13 5.422e-13 -3.210e-01 0.74831
## windbucketMedium 2.412e-13 8.657e-13 2.790e-01 0.78058
## windbucketHigh 3.773e-14 1.321e-12 2.900e-02 0.97722
## windbucketVery High 2.852e-14 1.961e-12 1.500e-02 0.98840
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.096e-12 on 686 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 6.747e+30 on 42 and 686 DF, p-value: < 2.2e-16
##tvalue-highr the better
##Fstatistic-the higher the better
##r sq n ad r sq-higher the better
##std error-closer to zero
##AIC-lower
##BIC-lower
# Then, splitting the observations into 80% for traindata and 20% for testdata
trainrow=sample(1:nrow(day),0.8*nrow(day))
traindata=day[trainrow,]
testdata=day[-trainrow,]
# For taindata set
lmmod=lm(cnt~ yr ,data=traindata)
summary(lmmod)##
## Call:
## lm(formula = cnt ~ yr, data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5608.2 -1268.2 321.7 1252.9 3083.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3423.82 94.08 36.39 <2e-16 ***
## yr 2206.42 131.27 16.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1586 on 582 degrees of freedom
## Multiple R-squared: 0.3268, Adjusted R-squared: 0.3256
## F-statistic: 282.5 on 1 and 582 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth, data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6195.3 -476.8 159.8 665.1 3568.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1104.15 164.60 6.708 4.76e-11 ***
## yr 2198.32 88.47 24.849 < 2e-16 ***
## mnth2 412.40 223.38 1.846 0.0654 .
## mnth3 1491.43 221.22 6.742 3.84e-11 ***
## mnth4 2419.65 223.43 10.829 < 2e-16 ***
## mnth5 3143.18 218.93 14.357 < 2e-16 ***
## mnth6 3514.70 222.33 15.808 < 2e-16 ***
## mnth7 3378.06 216.93 15.572 < 2e-16 ***
## mnth8 3505.09 218.96 16.008 < 2e-16 ***
## mnth9 3447.92 218.93 15.749 < 2e-16 ***
## mnth10 2914.80 215.97 13.496 < 2e-16 ***
## mnth11 1948.33 225.88 8.626 < 2e-16 ***
## mnth12 1309.06 215.97 6.061 2.45e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1065 on 571 degrees of freedom
## Multiple R-squared: 0.7018, Adjusted R-squared: 0.6955
## F-statistic: 112 on 12 and 571 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed, data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5475.2 -513.2 155.3 643.2 3195.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1961.39 203.86 9.621 < 2e-16 ***
## yr 2191.46 85.26 25.702 < 2e-16 ***
## mnth2 407.83 215.27 1.894 0.0587 .
## mnth3 1462.35 213.24 6.858 1.82e-11 ***
## mnth4 2477.12 215.49 11.495 < 2e-16 ***
## mnth5 3003.50 212.02 14.166 < 2e-16 ***
## mnth6 3391.75 215.05 15.772 < 2e-16 ***
## mnth7 3161.99 211.53 14.948 < 2e-16 ***
## mnth8 3298.10 213.27 15.465 < 2e-16 ***
## mnth9 3228.15 213.52 15.119 < 2e-16 ***
## mnth10 2725.09 210.05 12.973 < 2e-16 ***
## mnth11 1790.61 218.95 8.178 1.88e-15 ***
## mnth12 1151.76 209.45 5.499 5.78e-08 ***
## windspeed -3854.69 575.76 -6.695 5.18e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1027 on 570 degrees of freedom
## Multiple R-squared: 0.7235, Adjusted R-squared: 0.7172
## F-statistic: 114.7 on 13 and 570 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum, data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4826.9 -489.2 135.4 602.8 3407.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3593.95 279.92 12.839 < 2e-16 ***
## yr 2104.14 81.57 25.797 < 2e-16 ***
## mnth2 466.95 204.25 2.286 0.0226 *
## mnth3 1548.36 202.47 7.647 8.79e-14 ***
## mnth4 2549.07 204.52 12.464 < 2e-16 ***
## mnth5 3279.85 203.93 16.083 < 2e-16 ***
## mnth6 3445.25 204.01 16.888 < 2e-16 ***
## mnth7 3234.08 200.77 16.109 < 2e-16 ***
## mnth8 3464.55 203.27 17.044 < 2e-16 ***
## mnth9 3609.10 207.90 17.360 < 2e-16 ***
## mnth10 3055.65 203.34 15.027 < 2e-16 ***
## mnth11 1962.02 208.69 9.402 < 2e-16 ***
## mnth12 1393.71 200.85 6.939 1.08e-11 ***
## windspeed -4743.62 556.94 -8.517 < 2e-16 ***
## hum -2506.40 310.83 -8.064 4.40e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 973.5 on 569 degrees of freedom
## Multiple R-squared: 0.7519, Adjusted R-squared: 0.7458
## F-statistic: 123.2 on 14 and 569 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp, data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4454.8 -369.1 118.3 580.2 2552.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2721.72 269.31 10.106 < 2e-16 ***
## yr 1994.76 75.39 26.459 < 2e-16 ***
## mnth2 119.58 189.87 0.630 0.529080
## mnth3 699.83 202.11 3.463 0.000575 ***
## mnth4 1274.47 222.94 5.717 1.76e-08 ***
## mnth5 1446.49 255.26 5.667 2.32e-08 ***
## mnth6 1094.19 291.06 3.759 0.000188 ***
## mnth7 514.44 316.96 1.623 0.105139
## mnth8 998.95 299.06 3.340 0.000892 ***
## mnth9 1660.57 265.43 6.256 7.77e-10 ***
## mnth10 1821.27 219.98 8.279 8.90e-16 ***
## mnth11 1257.98 202.41 6.215 9.93e-10 ***
## mnth12 965.67 188.31 5.128 4.02e-07 ***
## windspeed -4647.50 509.95 -9.114 < 2e-16 ***
## hum -3101.09 290.11 -10.689 < 2e-16 ***
## temp 5301.75 503.41 10.532 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 891.2 on 568 degrees of freedom
## Multiple R-squared: 0.7924, Adjusted R-squared: 0.7869
## F-statistic: 144.6 on 15 and 568 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp + workingday,
## data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4508.7 -376.2 98.8 557.5 2688.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2635.17 271.60 9.703 < 2e-16 ***
## yr 1998.53 75.18 26.582 < 2e-16 ***
## mnth2 120.61 189.30 0.637 0.524278
## mnth3 702.96 201.50 3.489 0.000523 ***
## mnth4 1297.08 222.52 5.829 9.36e-09 ***
## mnth5 1469.05 254.71 5.768 1.32e-08 ***
## mnth6 1114.12 290.33 3.837 0.000138 ***
## mnth7 560.12 316.74 1.768 0.077535 .
## mnth8 1019.87 298.32 3.419 0.000674 ***
## mnth9 1686.52 264.91 6.366 4.00e-10 ***
## mnth10 1839.15 219.47 8.380 4.19e-16 ***
## mnth11 1264.79 201.82 6.267 7.29e-10 ***
## mnth12 976.49 187.81 5.199 2.80e-07 ***
## windspeed -4647.40 508.40 -9.141 < 2e-16 ***
## hum -3120.22 289.37 -10.783 < 2e-16 ***
## temp 5220.01 503.37 10.370 < 2e-16 ***
## workingday 171.53 81.19 2.113 0.035065 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 888.5 on 567 degrees of freedom
## Multiple R-squared: 0.794, Adjusted R-squared: 0.7882
## F-statistic: 136.6 on 16 and 567 DF, p-value: < 2.2e-16
## $Models
## Formula
## 1 "cnt ~ yr"
## 2 "cnt ~ yr + mnth"
## 3 "cnt ~ yr + mnth + windspeed"
## 4 "cnt ~ yr + mnth + windspeed + hum"
## 5 "cnt ~ yr + mnth + windspeed + hum + temp"
## 6 "cnt ~ yr + mnth + windspeed + hum + temp + workingday"
##
## $Fit.criteria
## Rank Df.res AIC AICc BIC R.squared Adj.R.sq p.value Shapiro.W
## 1 2 582 10270 10270 10280 0.3268 0.3256 5.648e-52 0.9558
## 2 13 571 9814 9815 9876 0.7018 0.6955 2.705e-141 0.9263
## 3 14 570 9772 9773 9838 0.7235 0.7172 1.478e-149 0.9423
## 4 15 569 9711 9712 9781 0.7519 0.7458 8.691e-162 0.9474
## 5 16 568 9609 9610 9683 0.7924 0.7869 1.378e-182 0.9414
## 6 17 567 9606 9607 9685 0.7940 0.7882 1.852e-182 0.9433
## Shapiro.p
## 1 3.099e-12
## 2 2.538e-16
## 3 2.748e-14
## 4 1.487e-13
## 5 2.061e-14
## 6 3.883e-14
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp + workingday,
## data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4508.7 -376.2 98.8 557.5 2688.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2635.17 271.60 9.703 < 2e-16 ***
## yr 1998.53 75.18 26.582 < 2e-16 ***
## mnth2 120.61 189.30 0.637 0.524278
## mnth3 702.96 201.50 3.489 0.000523 ***
## mnth4 1297.08 222.52 5.829 9.36e-09 ***
## mnth5 1469.05 254.71 5.768 1.32e-08 ***
## mnth6 1114.12 290.33 3.837 0.000138 ***
## mnth7 560.12 316.74 1.768 0.077535 .
## mnth8 1019.87 298.32 3.419 0.000674 ***
## mnth9 1686.52 264.91 6.366 4.00e-10 ***
## mnth10 1839.15 219.47 8.380 4.19e-16 ***
## mnth11 1264.79 201.82 6.267 7.29e-10 ***
## mnth12 976.49 187.81 5.199 2.80e-07 ***
## windspeed -4647.40 508.40 -9.141 < 2e-16 ***
## hum -3120.22 289.37 -10.783 < 2e-16 ***
## temp 5220.01 503.37 10.370 < 2e-16 ***
## workingday 171.53 81.19 2.113 0.035065 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 888.5 on 567 degrees of freedom
## Multiple R-squared: 0.794, Adjusted R-squared: 0.7882
## F-statistic: 136.6 on 16 and 567 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr, data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4542.2 -1195.4 306.6 1285.1 3092.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3342.4 183.3 18.234 < 2e-16 ***
## yr 2119.8 273.6 7.749 1.48e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1650 on 145 degrees of freedom
## Multiple R-squared: 0.2928, Adjusted R-squared: 0.2879
## F-statistic: 60.04 on 1 and 145 DF, p-value: 1.484e-12
##
## Call:
## lm(formula = cnt ~ yr + mnth, data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3289.4 -394.7 20.8 475.4 2141.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1041.4 245.3 4.246 4.04e-05 ***
## yr 2131.0 164.5 12.951 < 2e-16 ***
## mnth2 617.0 365.8 1.687 0.0940 .
## mnth3 1557.6 345.1 4.514 1.38e-05 ***
## mnth4 1914.7 342.3 5.593 1.21e-07 ***
## mnth5 3250.9 356.9 9.109 1.04e-15 ***
## mnth6 3843.8 351.7 10.928 < 2e-16 ***
## mnth7 3378.2 377.8 8.942 2.69e-15 ***
## mnth8 3363.0 357.9 9.397 < 2e-16 ***
## mnth9 4249.5 377.8 11.249 < 2e-16 ***
## mnth10 3566.3 389.9 9.146 8.47e-16 ***
## mnth11 2407.2 329.3 7.310 2.18e-11 ***
## mnth12 653.8 389.9 1.677 0.0959 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 945.3 on 134 degrees of freedom
## Multiple R-squared: 0.7854, Adjusted R-squared: 0.7662
## F-statistic: 40.87 on 12 and 134 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed, data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3034.09 -401.32 36.03 576.86 2208.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1320.3 302.2 4.368 2.50e-05 ***
## yr 2132.4 163.7 13.029 < 2e-16 ***
## mnth2 668.1 365.3 1.829 0.0697 .
## mnth3 1704.9 355.9 4.790 4.39e-06 ***
## mnth4 2020.1 347.1 5.819 4.19e-08 ***
## mnth5 3263.9 355.1 9.192 6.86e-16 ***
## mnth6 3852.3 349.9 11.009 < 2e-16 ***
## mnth7 3382.8 375.8 9.002 2.01e-15 ***
## mnth8 3411.3 357.3 9.547 < 2e-16 ***
## mnth9 4260.8 375.8 11.337 < 2e-16 ***
## mnth10 3611.0 388.9 9.285 4.04e-16 ***
## mnth11 2451.8 328.8 7.457 1.02e-11 ***
## mnth12 631.4 388.1 1.627 0.1061
## windspeed -1684.3 1077.5 -1.563 0.1204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 940.3 on 133 degrees of freedom
## Multiple R-squared: 0.7893, Adjusted R-squared: 0.7687
## F-statistic: 38.32 on 13 and 133 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum, data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2747.43 -443.93 -28.09 379.06 2473.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3285.3 525.8 6.248 5.31e-09 ***
## yr 2088.0 153.6 13.594 < 2e-16 ***
## mnth2 302.9 351.9 0.861 0.3909
## mnth3 1560.3 334.9 4.659 7.67e-06 ***
## mnth4 1960.7 325.3 6.027 1.57e-08 ***
## mnth5 3453.7 335.3 10.301 < 2e-16 ***
## mnth6 3510.3 336.6 10.428 < 2e-16 ***
## mnth7 3108.7 357.3 8.701 1.15e-14 ***
## mnth8 3336.9 335.0 9.960 < 2e-16 ***
## mnth9 4216.4 352.1 11.975 < 2e-16 ***
## mnth10 3541.2 364.5 9.714 < 2e-16 ***
## mnth11 2266.3 310.7 7.293 2.50e-11 ***
## mnth12 616.0 363.5 1.695 0.0925 .
## windspeed -2657.6 1032.7 -2.574 0.0112 *
## hum -2674.8 603.2 -4.434 1.93e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 880.5 on 132 degrees of freedom
## Multiple R-squared: 0.8166, Adjusted R-squared: 0.7972
## F-statistic: 41.98 on 14 and 132 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp, data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2406.14 -298.26 -10.78 408.31 1780.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2200.48 526.87 4.177 5.37e-05 ***
## yr 1975.11 142.54 13.856 < 2e-16 ***
## mnth2 68.25 325.86 0.209 0.83443
## mnth3 946.21 329.79 2.869 0.00480 **
## mnth4 886.52 365.08 2.428 0.01653 *
## mnth5 1549.47 483.46 3.205 0.00170 **
## mnth6 1065.97 569.82 1.871 0.06362 .
## mnth7 273.73 644.95 0.424 0.67195
## mnth8 855.46 575.17 1.487 0.13934
## mnth9 2344.44 488.65 4.798 4.30e-06 ***
## mnth10 2216.32 423.22 5.237 6.34e-07 ***
## mnth11 1707.78 305.19 5.596 1.23e-07 ***
## mnth12 276.70 339.81 0.814 0.41697
## windspeed -3304.90 955.23 -3.460 0.00073 ***
## hum -2782.14 553.44 -5.027 1.60e-06 ***
## temp 5385.24 1055.33 5.103 1.15e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 807.3 on 131 degrees of freedom
## Multiple R-squared: 0.847, Adjusted R-squared: 0.8295
## F-statistic: 48.35 on 15 and 131 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp + workingday,
## data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2386.22 -327.65 -4.84 429.71 1842.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2027.67 540.11 3.754 0.000261 ***
## yr 1967.83 142.17 13.841 < 2e-16 ***
## mnth2 70.49 324.79 0.217 0.828514
## mnth3 922.29 329.17 2.802 0.005858 **
## mnth4 866.86 364.15 2.380 0.018741 *
## mnth5 1558.24 481.90 3.233 0.001550 **
## mnth6 1103.00 568.58 1.940 0.054555 .
## mnth7 285.69 642.87 0.444 0.657495
## mnth8 874.96 573.44 1.526 0.129490
## mnth9 2398.06 488.61 4.908 2.71e-06 ***
## mnth10 2232.76 421.99 5.291 5.01e-07 ***
## mnth11 1721.46 304.35 5.656 9.39e-08 ***
## mnth12 265.20 338.79 0.783 0.435172
## windspeed -3194.17 955.50 -3.343 0.001083 **
## hum -2696.31 555.16 -4.857 3.37e-06 ***
## temp 5341.36 1052.33 5.076 1.31e-06 ***
## workingday 190.33 139.13 1.368 0.173676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 804.6 on 130 degrees of freedom
## Multiple R-squared: 0.8492, Adjusted R-squared: 0.8306
## F-statistic: 45.75 on 16 and 130 DF, p-value: < 2.2e-16
## $Models
## Formula
## 1 "cnt ~ yr"
## 2 "cnt ~ yr + mnth"
## 3 "cnt ~ yr + mnth + windspeed"
## 4 "cnt ~ yr + mnth + windspeed + hum"
## 5 "cnt ~ yr + mnth + windspeed + hum + temp"
## 6 "cnt ~ yr + mnth + windspeed + hum + temp + workingday"
##
## $Fit.criteria
## Rank Df.res AIC AICc BIC R.squared Adj.R.sq p.value Shapiro.W Shapiro.p
## 1 2 145 2599 2599 2608 0.2928 0.2879 1.484e-12 0.9650 8.402e-04
## 2 13 134 2446 2449 2488 0.7854 0.7662 7.023e-39 0.9510 4.773e-05
## 3 14 133 2445 2449 2490 0.7893 0.7687 1.409e-38 0.9593 2.470e-04
## 4 15 132 2427 2431 2475 0.8166 0.7972 1.142e-41 0.9633 5.857e-04
## 5 16 131 2402 2407 2453 0.8470 0.8295 6.647e-46 0.9493 3.463e-05
## 6 17 130 2402 2407 2456 0.8492 0.8306 1.896e-45 0.9545 9.362e-05
##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp + workingday,
## data = testdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2386.22 -327.65 -4.84 429.71 1842.87
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2027.67 540.11 3.754 0.000261 ***
## yr 1967.83 142.17 13.841 < 2e-16 ***
## mnth2 70.49 324.79 0.217 0.828514
## mnth3 922.29 329.17 2.802 0.005858 **
## mnth4 866.86 364.15 2.380 0.018741 *
## mnth5 1558.24 481.90 3.233 0.001550 **
## mnth6 1103.00 568.58 1.940 0.054555 .
## mnth7 285.69 642.87 0.444 0.657495
## mnth8 874.96 573.44 1.526 0.129490
## mnth9 2398.06 488.61 4.908 2.71e-06 ***
## mnth10 2232.76 421.99 5.291 5.01e-07 ***
## mnth11 1721.46 304.35 5.656 9.39e-08 ***
## mnth12 265.20 338.79 0.783 0.435172
## windspeed -3194.17 955.50 -3.343 0.001083 **
## hum -2696.31 555.16 -4.857 3.37e-06 ***
## temp 5341.36 1052.33 5.076 1.31e-06 ***
## workingday 190.33 139.13 1.368 0.173676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 804.6 on 130 degrees of freedom
## Multiple R-squared: 0.8492, Adjusted R-squared: 0.8306
## F-statistic: 45.75 on 16 and 130 DF, p-value: < 2.2e-16
lmmod=lm(cnt~yr+mnth+windspeed+hum+temp+workingday,data=traindata)
pred1=predict(lmmod, testdata)
summary(lmmod, correlation = TRUE)##
## Call:
## lm(formula = cnt ~ yr + mnth + windspeed + hum + temp + workingday,
## data = traindata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4508.7 -376.2 98.8 557.5 2688.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2635.17 271.60 9.703 < 2e-16 ***
## yr 1998.53 75.18 26.582 < 2e-16 ***
## mnth2 120.61 189.30 0.637 0.524278
## mnth3 702.96 201.50 3.489 0.000523 ***
## mnth4 1297.08 222.52 5.829 9.36e-09 ***
## mnth5 1469.05 254.71 5.768 1.32e-08 ***
## mnth6 1114.12 290.33 3.837 0.000138 ***
## mnth7 560.12 316.74 1.768 0.077535 .
## mnth8 1019.87 298.32 3.419 0.000674 ***
## mnth9 1686.52 264.91 6.366 4.00e-10 ***
## mnth10 1839.15 219.47 8.380 4.19e-16 ***
## mnth11 1264.79 201.82 6.267 7.29e-10 ***
## mnth12 976.49 187.81 5.199 2.80e-07 ***
## windspeed -4647.40 508.40 -9.141 < 2e-16 ***
## hum -3120.22 289.37 -10.783 < 2e-16 ***
## temp 5220.01 503.37 10.370 < 2e-16 ***
## workingday 171.53 81.19 2.113 0.035065 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 888.5 on 567 degrees of freedom
## Multiple R-squared: 0.794, Adjusted R-squared: 0.7882
## F-statistic: 136.6 on 16 and 567 DF, p-value: < 2.2e-16
##
## Correlation of Coefficients:
## (Intercept) yr mnth2 mnth3 mnth4 mnth5 mnth6 mnth7 mnth8 mnth9
## yr -0.19
## mnth2 -0.27 0.02
## mnth3 -0.17 0.02 0.53
## mnth4 -0.09 0.05 0.51 0.61
## mnth5 0.00 0.07 0.49 0.62 0.68
## mnth6 0.00 0.08 0.45 0.61 0.69 0.77
## mnth7 0.01 0.12 0.44 0.60 0.69 0.78 0.82
## mnth8 0.02 0.09 0.45 0.61 0.69 0.78 0.81 0.83
## mnth9 0.03 0.08 0.48 0.62 0.68 0.76 0.77 0.79 0.79
## mnth10 -0.07 0.05 0.52 0.62 0.65 0.71 0.69 0.70 0.71 0.71
## mnth11 -0.20 0.04 0.52 0.57 0.57 0.59 0.56 0.56 0.57 0.59
## mnth12 -0.23 0.00 0.54 0.56 0.54 0.54 0.50 0.49 0.50 0.54
## windspeed -0.54 0.04 -0.01 0.00 -0.05 0.03 0.04 0.07 0.06 0.06
## hum -0.60 0.15 0.00 0.03 0.07 0.01 0.13 0.13 0.09 -0.03
## temp -0.29 -0.14 -0.17 -0.40 -0.54 -0.68 -0.77 -0.82 -0.78 -0.70
## workingday -0.15 0.02 0.00 0.01 0.05 0.04 0.03 0.07 0.03 0.05
## mnth10 mnth11 mnth12 windspeed hum temp
## yr
## mnth2
## mnth3
## mnth4
## mnth5
## mnth6
## mnth7
## mnth8
## mnth9
## mnth10
## mnth11 0.60
## mnth12 0.58 0.56
## windspeed 0.07 0.07 0.07
## hum -0.06 -0.03 -0.10 0.19
## temp -0.53 -0.33 -0.22 0.02 -0.19
## workingday 0.04 0.02 0.03 0.00 -0.03 -0.08
## 2.5 % 97.5 %
## (Intercept) 2101.71462 3168.6343
## yr 1850.86068 2146.2033
## mnth2 -251.19573 492.4198
## mnth3 307.18582 1098.7429
## mnth4 860.01376 1734.1533
## mnth5 968.76147 1969.3405
## mnth6 543.86198 1684.3717
## mnth7 -62.01019 1182.2469
## mnth8 433.92378 1605.8081
## mnth9 1166.18822 2206.8434
## mnth10 1408.06850 2270.2287
## mnth11 868.38334 1661.1915
## mnth12 607.60203 1345.3819
## windspeed -5645.98076 -3648.8185
## hum -3688.58593 -2551.8488
## temp 4231.30386 6208.7085
## workingday 12.05783 330.9966
## [1] 0.1949675
actuals_preds <- data.frame(cbind(actuals=testdata$cnt, predicteds=pred1))
correlation_accuracy <- cor(actuals_preds)
correlation_accuracy## actuals predicteds
## actuals 1.0000000 0.9012364
## predicteds 0.9012364 1.0000000
min_max_accuracy <- mean(apply(actuals_preds, 1, min) / apply(actuals_preds, 1, max))
min_max_accuracy## [1] 0.8572675
## [1] 0.2161647
## Hence, the accuracy of the model is 0.85.
# Residual plots
x = residuals(lmmod)
plotNormalHistogram(x)